#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2010,2011 Marcel Martin <marcel.martin@tu-dortmund.de>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

"""%prog [options] <FASTA/FASTQ FILE> [<QUALITY FILE>]

Reads a FASTA or FASTQ file, finds and removes adapters,
and writes the changed sequence to standard output.
When finished, statistics are printed to standard error.

Use a dash "-" as file name to read from standard input
(FASTA/FASTQ is autodetected).

If two file names are given, the first must be a .fasta or .csfasta
file and the second must be a .qual file. This is the file format
used by some 454 software and by the SOLiD sequencer.
If you have color space data, you still need to provide the -c option
to correctly deal with color space!

If the name of any input or output file ends with '.gz', it is
assumed to be gzip-compressed.

If you want to search for the reverse complement of an adapter, you must
provide an additional adapter sequence using two '-a' parameters.

If the input sequences are in color space, the adapter must
also be provided in color space (using a string of digits 0123).

EXAMPLE

Assuming your sequencing data is available as a FASTQ file, use this
command line:
$ cutadapt -e ERROR-RATE -a ADAPTER-SEQUENCE input.fastq > output.fastq

See the README file for more help and examples."""

from __future__ import print_function, division

import sys
import re
import gzip
import time
from os.path import dirname, join, isfile, realpath
if sys.version_info[0] < 3:
	from string import maketrans
else:
	maketrans = bytes.maketrans
	xrange = range
from optparse import OptionParser, OptionGroup
from contextlib import closing
from collections import defaultdict

# If running from within source directory,
# add 'lib' to sys.path.
_libdir = join(dirname(realpath(__file__)), 'lib')
if isfile(join(_libdir, 'cutadapt', '__init__.py')):
	sys.path.insert(0, _libdir)

from cutadapt import align, seqio
from cutadapt.xopen import xopen
from cutadapt.qualtrim import quality_trim_index
from cutadapt import __version__

# Constants for the find_best_alignment function.
# The function is called with SEQ1 as the adapter, SEQ2 as the read.
BACK = align.START_WITHIN_SEQ2 | align.STOP_WITHIN_SEQ2 | align.STOP_WITHIN_SEQ1
FRONT = align.START_WITHIN_SEQ2 | align.STOP_WITHIN_SEQ2 | align.START_WITHIN_SEQ1
ANYWHERE = align.SEMIGLOBAL


class HelpfulOptionParser(OptionParser):
	"""An OptionParser that prints full help on errors."""
	def error(self, msg):
		self.print_help(sys.stderr)
		self.exit(2, "\n%s: error: %s\n" % (self.get_prog_name(), msg))


def print_histogram(d):
	"""d -- a dictionary mapping values to their respective frequency"""
	print("length", "count", sep="\t")
	for key in sorted(d):
		print(key, d[key], sep="\t")
	print()


class Statistics(object):
	"""Store statistics about reads and adapters"""

	def __init__(self, adapters):
		self.reads_changed = 0
		self.too_short = 0
		self.too_long = 0
		self.n = 0
		self._start_time = time.clock()
		self.time = None
		self.lengths_front = []
		self.lengths_back = []
		self.adapters = adapters
		for a in adapters:
			self.lengths_front.append(defaultdict(int))
			self.lengths_back.append(defaultdict(int))

	def stop_clock(self):
		"""Stop the timer that was automatically started when the class was instantiated."""
		self.time = time.clock() - self._start_time

	def print_statistics(self, error_rate):
		"""Print summary to stdout"""
		if self.time is None:
			self.stop_clock()
		print("cutadapt version", __version__)
		print("Command line parameters:", " ".join(sys.argv[1:]))
		print("Maximum error rate: %.2f%%" % (error_rate * 100.))
		print("   Processed reads:", self.n)
		if self.n > 0:
			print("     Trimmed reads:", self.reads_changed, "(%5.1f%%)" % (100. * self.reads_changed / self.n))
			print("   Too short reads:", self.too_short, "(%5.1f%% of processed reads)" % (100. * self.too_short / self.n))
			print("    Too long reads:", self.too_long, "(%5.1f%% of processed reads)" % (100. * self.too_long / self.n))
		print("        Total time: %9.2f s" % self.time)
		if self.n > 0:
			print("     Time per read: %9.2f ms" % (1000. * self.time / self.n))
		print()
		for index, (where, adapter) in enumerate(self.adapters):
			total_front = sum(self.lengths_front[index].values())
			total_back = sum(self.lengths_back[index].values())
			total = total_front + total_back
			assert where == ANYWHERE or (where == BACK and total_front == 0) or (where == FRONT and total_back == 0)

			print("=" * 3, "Adapter", index+1, "=" * 3)
			print()
			print("Adapter '%s'," % adapter.upper(), "length %d," % len(adapter), "was trimmed", total, "times.")
			if where == ANYWHERE:
				print(total_front, "times, it overlapped the 5' end of a read")
				print(total_back, "times, it overlapped the 3' end or was within the read")
				print()
				print("Histogram of adapter lengths (5')")
				print_histogram(self.lengths_front[index])
				print()
				print("Histogram of adapter lengths (3' or within)")
				print_histogram(self.lengths_back[index])
			elif where == FRONT:
				print()
				print("Histogram of adapter lengths")
				print_histogram(self.lengths_front[index])
			else:
				assert where == BACK
				print()
				print("Histogram of adapter lengths")
				print_histogram(self.lengths_back[index])

		if self.n == 0:
			print("No reads were read! Either your input file is empty or you used the wrong -f/--format parameter.")


def find_best_alignment(adapters, seq, max_error_rate, minimum_overlap, match_read_wildcard):
	"""
	Find the best matching adapter.

	adapters -- List of tuples (where, adapter). 'where' is either BACK or
	            ANYWHERE. The adapter itself is a string.
	seq -- The sequence to which each adapter will be aligned
	where -- Where in the sequence the adapter may be found.
		One of the FRONT, BACK and ANYWHERE constants. For all,
		the adapter will also be found if it is in the middle.
	max_error_rate -- Maximum allowed error rate. The error rate is
		the number of errors in the alignment divided by the length
		of the part of the alignment that matches the adapter.
	minimum_overlap -- Minimum length of the part of the alignment
		that matches the adapter.
	match_read_wildcard -- Whether wildcards ('N' characters) in the read
		are matches (at zero cost).

	Return tuple (best_alignment, best_index).

	best_alignment is an alignment as returned by semiglobalalign.
	best_index is the index of the best adapter into the adapters list.
	"""
	best_result = None
	best_matches = 0
	best_index = None
	for index, (where, adapter) in enumerate(adapters):
		# try to find an exact match first
		pos = seq.find(adapter)
		if pos >= 0:
			result = (0, len(adapter), pos, pos + len(adapter), len(adapter), 0)
		else:
			n_mode = align.ALLOW_WILDCARD_SEQ2 if match_read_wildcard else 0
			if 'N' in adapter:
				n_mode = n_mode | align.ALLOW_WILDCARD_SEQ1
			result = align.globalalign_locate(adapter, seq, max_error_rate, where, n_mode)
		(astart, astop, rstart, rstop, matches, errors) = result
		length = astop - astart
		assert length == 0 or errors / length <= max_error_rate
		if length < minimum_overlap:
			continue

		# the no. of matches determines which adapter fits best
		if matches > best_matches:
			best_result = result
			best_matches = matches
			best_index = index
	return (best_result, best_index)


def write_read(read, outfile, twoheaders=False):
	"""
	Write read in either FASTA or FASTQ format
	(depending on whether qualities is None or not) to outfile

	If twoheaders is True and the output is FASTQ, then the sequence name
	(description) is also written after the "+" character in the third line.

	"""
	if read.qualities is None:
		# FASTA
		print('>%s\n%s' % (read.name, read.sequence), file=outfile)
	else:
		# FASTQ
		tmp = read.name if twoheaders else ''
		print('@%s\n%s\n+%s\n%s' % (read.name, read.sequence, tmp, read.qualities), file=outfile)


def read_sequences(seqfilename, qualityfilename, colorspace, fileformat):
	"""
	Read sequences and (if available) quality information from either:
	* seqfilename in FASTA format (qualityfilename must be None)
	* seqfilename in FASTQ format (qualityfilename must be None)
	* seqfilename in .csfasta format and qualityfilename in .qual format
	  (SOLiD color space)

	Return a generator over tuples (description, sequence, qualities).
	qualities is None if no qualities are available.
	qualities are ASCII-encoded (chr(quality) + 33).
	"""
	#if ftype == 'FASTQ' and qualityfilename is not None:
		#raise ValueError("If a FASTQ file is given, no quality file can be provided.")

	if qualityfilename is not None:
		# read from .(CS)FASTA/.QUAL
		return seqio.FastaQualReader(seqfilename, qualityfilename, colorspace)
	else:
		# read from FASTA or FASTQ
		return seqio.SequenceReader(seqfilename, colorspace, fileformat)


class ReadFilter(object):
	"""Filter reads according to length and according to whether any adapter matches."""

	def __init__(self, minimum_length, maximum_length, too_short_outfile, discard_trimmed, statistics):
		self.minimum_length = minimum_length
		self.maximum_length = maximum_length
		self.too_short_outfile = too_short_outfile
		self.statistics = statistics
		self.discard_trimmed = discard_trimmed

	def keep(self, read, trimmed):
		"""
		Return whether to keep the given read.
		"""
		if self.discard_trimmed and trimmed:
			return False
		if len(read.sequence) < self.minimum_length:
			self.statistics.too_short += 1
			if self.too_short_outfile is not None:
				write_read(read, self.too_short_outfile)
			return False
		if len(read.sequence) > self.maximum_length:
			self.statistics.too_long += 1
			return False
		return True


class LengthTagModifier:
	"""
	Replace "length=..." strings in read names.
	"""
	def __init__(self, length_tag):
		self.regex = re.compile(r"\b" + length_tag + r"[0-9]*\b")
		self.length_tag = length_tag

	def apply(self, read):
		read = read[:]
		if read.name.find(self.length_tag) >= 0:
			read.name = self.regex.sub(self.length_tag + str(len(read.sequence)), read.name)
		return read


class SuffixRemover:
	"""
	Remove any suffix from read names.
	"""
	def __init__(self, suffix):
		self.suffix = suffix

	def apply(self, read):
		read = read[:]
		if read.name.endswith(self.suffix):
			read.name = read.name[:-3]
		return read


class PrefixSuffixAdder:
	"""
	Add a suffix and a prefix to read names
	"""
	def __init__(self, prefix, suffix):
		self.prefix = prefix
		self.suffix = suffix

	def apply(self, read):
		read = read[:]
		read.name = self.prefix + read.name + self.suffix
		return read


class DoubleEncoder:
	"""
	Double-encode colorspace reads, using characters ACGTN to represent colors.
	"""
	def __init__(self):
		self.DOUBLE_ENCODE_TRANS = maketrans(b'0123.', b'ACGTN')

	def apply(self, read):
		read = read[:]
		read.sequence = read.sequence.translate(self.DOUBLE_ENCODE_TRANS)
		return read


class ZeroCapper:
	"""
	Change negative quality values of a read to zero
	"""
	def __init__(self):
		if sys.version_info[0] < 3:
			self.ZERO_CAP_TRANS = maketrans(''.join(map(chr, range(33))), chr(33) * 33)
		else:
			self.ZERO_CAP_TRANS = maketrans(bytes(range(33)), b'\x21' * 33)

	def apply(self, read):
		read = read[:]
		read.qualities = read.qualities.translate(self.ZERO_CAP_TRANS)
		return read


class AdapterCutter(object):
	"""Cut adapters from reads."""

	def __init__(self, times, error_rate, overlap, rest_file, colorspace, adapters,
					match_read_wildcards, wildcard_file):
		"""Initialize this cutter.
		adapters --
			List of tuples (where, adapter). 'where' is either BACK
			or ANYWHERE, and adapter is the adapter as a string.
			Adapters will be converted to uppercase before aligning
			them.
		"""
		self.times = times
		self.error_rate = error_rate
		self.overlap = overlap
		self.rest_file = rest_file
		self.colorspace = colorspace
		self.adapters = [ (where, adapter.upper()) for (where, adapter) in adapters ]
		self.stats = Statistics(adapters)
		self.match_read_wildcards = match_read_wildcards
		self.wildcard_file = wildcard_file

	def cut(self, read):
		"""
		Cut adapters from a single read. The read will be converted to uppercase
		before it is compared to the adapter sequences.

		seq -- sequence of the read
		desc -- description of the read
		qualities -- quality values of the read

		Return a tuple (read, trimmed) with the modified read.
		trimmed is True when any adapter was found and trimmed.
		"""
		self.stats.n += 1

		if __debug__:
			old_length = len(read.sequence)

		# try (possibly more than once) to remove an adapter
		any_adapter_matches = False
		for t in xrange(self.times):
			alignment, index = find_best_alignment(self.adapters, read.sequence.upper(),
				self.error_rate, self.overlap, self.match_read_wildcards)
			if alignment is None:
				# nothing found
				break

			(astart, astop, rstart, rstop, matches, errors) = alignment
			length = astop - astart
			assert length > 0
			assert errors / length <= self.error_rate
			assert length - errors > 0

			any_adapter_matches = True
			where = self.adapters[index][0]

			if where == BACK or (where == ANYWHERE and astart == 0 and rstart > 0):
				assert where == ANYWHERE or astart == 0
				# The adapter is at the end of the read or within the read
				if rstop < len(read.sequence):
					# The adapter is within the read
					if self.rest_file is not None:
						print(read.sequence[rstop:], file=self.rest_file)
				self.stats.lengths_back[index][length] += 1

				if self.colorspace:
					# trim one more color if long enough
					rstart = max(0, rstart - 1)
				trimmed, read = read[rstart:], read[:rstart]
			elif where == FRONT or where == ANYWHERE:
				# The adapter is in the beginning of the read
				assert where == FRONT or rstart == 0

				self.stats.lengths_front[index][length] += 1
				# TODO What should we do in color space?
				trimmed, read = read[:rstop], read[rstop:]
			else:
				assert False

			if trimmed and self.wildcard_file is not None:
				seq_match = trimmed.sequence
				adap_match = self.adapters[index][1]
				wildcards = [seq_match[i] for i in range(astart, astop)
								if adap_match[i] == 'N']
				print(''.join(wildcards), trimmed.name, file=self.wildcard_file)

		# if an adapter was found, then the read should now be shorter
		assert (not any_adapter_matches) or (len(read.sequence) < old_length)
		if any_adapter_matches: # TODO move to filter class
			self.stats.reads_changed += 1

		return (read, any_adapter_matches)


#class QualityTrimmer:
	#"""Pipeline step that trims qualities"""
	#def __init__(self, cutoff):
		#self.cutoff = cutoff

	#def process(self, read):
		#index = quality_trim_index(qualities, options.quality_cutoff)
		##total_quality_trimmed += len(qualities) - index
		#qualities = qualities[:index]
		#seq = seq[:index]



def main():
	"""Main function that evaluates command-line parameters and contains the main loop over all reads."""
	parser = HelpfulOptionParser(usage=__doc__, version=__version__)

	parser.add_option("-f", "--format", default=None,
		help="Input file format; can be either 'fasta' or 'fastq'. "
		"Ignored when reading csfasta/qual files (default: auto-detect from file name extension).")

	group = OptionGroup(parser, "Options that influence how the adapters are found")
	group.add_option("-a", "--adapter", action="append", metavar="ADAPTER", dest="adapters",
		help="Sequence of an adapter that was ligated to the 3' end. The adapter itself and anything that follows is trimmed. If multiple -a, -b or -g options are given, only the best matching adapter is trimmed.")
	group.add_option("-b", "--anywhere", action="append", metavar="ADAPTER",
		help="Sequence of an adapter that was ligated to the 5' or 3' end. If the adapter is found within the read or overlapping the 3' end of the read, the behavior is the same as for the -a option. If the adapter overlaps the 5' end (beginning of the read), the initial portion of the read matching the adapter is trimmed, but anything that follows is kept. If multiple -a, -b or -g options are given, only the best matching adapter is trimmed.")
	group.add_option("-g", "--front", action="append", metavar="ADAPTER",
		help="Sequence of an adapter that was ligated to the 5' end. If the adapter overlaps the 5' end (beginning of the read), the initial portion of the read matching the adapter is trimmed, but anything that follows is kept. If multiple -a, -b or -g options are given, only the best matching adapter is trimmed.")
	group.add_option("-e", "--error-rate", type=float, default=0.1,
		help="Maximum allowed error rate (no. of errors divided by the length of the matching region) (default: %default)")
	group.add_option("-n", "--times", type=int, metavar="COUNT", default=1,
		help="Try to remove adapters at most COUNT times. Useful when an adapter gets appended multiple times.")
	group.add_option("-O", "--overlap", type=int, metavar="LENGTH", default=3,
		help="Minimum overlap length. If the overlap between the read and the adapter is shorter than LENGTH, the read is not modified."
			"This reduces the no. of bases trimmed purely due to short random adapter matches (default: %default).")
	group.add_option("--match-read-wildcards", action="store_true", default=False,
		help="Allow 'N's in the read as matches to the adapter (default: %default).")
	parser.add_option_group(group)

	group = OptionGroup(parser, "Options for filtering of processed reads")
	group.add_option("--discard-trimmed", "--discard", action='store_true', default=False,
		help="Discard reads that contain the adapter instead of trimming them. Also use -O in order to avoid throwing away too many randomly matching reads!")
	group.add_option("-m", "--minimum-length", type=int, default=0, metavar="LENGTH",
		help="Discard trimmed reads that are shorter than LENGTH. Reads that are too short even before adapter removal are also discarded. In colorspace, an initial primer is not counted.")
	group.add_option("-M", "--maximum-length", type=int, default=sys.maxsize, metavar="LENGTH",
		help="Discard trimmed reads that are longer than LENGTH. "
			"Reads that are too long even before adapter removal "
			"are also discarded. In colorspace, an initial primer "
			"is not counted.")
	parser.add_option_group(group)

	group = OptionGroup(parser, "Options that influence what gets output to where")
	group.add_option("-o", "--output", default=None, metavar="FILE",
		help="Write the modified sequences to this file instead of standard output and send the summary report to standard output. "
		     "The format is FASTQ if qualities are available, FASTA otherwise. (default: standard output)")
	group.add_option("-r", "--rest-file", default=None, metavar="FILE",
		help="When the adapter matches in the middle of a read, write the rest (after the adapter) into a file. Use - for standard output.")
	group.add_option("--wildcard-file", default=None, metavar="FILE",
		help="When the adapter has wildcard bases ('N's) write adapter bases matching wildcard "
		     "positions to FILE.  Use - for standard output.")
	group.add_option("--too-short-output", default=None, metavar="FILE",
		help="Write reads that are too short (according to length specified by -m) to FILE. (default: discard reads)")
	group.add_option("--untrimmed-output", default=None, metavar="FILE",
		help="Write reads that do not contain the adapter to FILE, instead "
			"of writing them to the regular output file. (default: output "
			"to same file as trimmed)")
	parser.add_option_group(group)

	group = OptionGroup(parser, "Additional modifications to the reads")
	group.add_option("-q", "--quality-cutoff", type=int, default=None, metavar="CUTOFF",
		help="Trim low-quality ends from reads before adapter removal. "
			"The algorithm is the same as the one used by BWA "
			"(Subtract CUTOFF from all qualities; "
			"compute partial sums from all indices to the end of the "
			"sequence; cut sequence at the index at which the sum "
			"is minimal) (default: %default)")
	group.add_option("--quality-base", type=int, default=33,
		help="Assume that quality values are encoded as ascii(quality + QUALITY_BASE). The default (33) is usually correct, "
		     "except for reads produced by some versions of the Illumina pipeline, where this should be set to 64. (default: %default)")
	group.add_option("-x", "--prefix", default='',
		help="Add this prefix to read names")
	group.add_option("-y", "--suffix", default='',
		help="Add this suffix to read names")
	group.add_option("-c", "--colorspace", action='store_true', default=False,
		help="Colorspace mode: Also trim the color that is adjacent to the found adapter.")
	group.add_option("-d", "--double-encode", action='store_true', default=False,
		help="When in color space, double-encode colors (map 0,1,2,3,4 to A,C,G,T,N).")
	group.add_option("-t", "--trim-primer", action='store_true', default=False,
		help="When in color space, trim primer base and the first color "
			"(which is the transition to the first nucleotide)")
	group.add_option("--strip-f3", action='store_true', default=False,
		help="For color space: Strip the _F3 suffix of read names")
	group.add_option("--maq", "--bwa", action='store_true', default=False,
		help="MAQ- and BWA-compatible color space output. This enables -c, -d, -t, --strip-f3, -y '/1' and -z.")
	group.add_option("--length-tag", default=None, metavar="TAG",
		help="Search for TAG followed by a decimal number in the name of the read "
			"(description/comment field of the FASTA or FASTQ file). Replace the "
			"decimal number with the correct length of the trimmed read. "
			"For example, use --length-tag 'length=' to search for fields "
			"like 'length=123'.")
	group.add_option("--zero-cap", "-z", action='store_true', default=False,
		help="Change negative quality values to zero (workaround to avoid segmentation faults in BWA)")
	parser.add_option_group(group)

	options, args = parser.parse_args()

	if len(args) == 0:
		parser.error("At least one parameter needed: name of a FASTA or FASTQ file.")
	elif len(args) > 2:
		parser.error("Too many parameters.")

	input_filename = args[0]
	quality_filename = None
	if len(args) == 2:
		quality_filename = args[1]

	if options.format is not None and options.format.lower() not in ['fasta', 'fastq']:
		parser.error("The input file format must be either 'fasta' or 'fastq' (not '{0}').".format(options.format))

	# TODO should this really be an error?
	if options.format is not None and quality_filename is not None:
		parser.error("If a pair of .fasta and .qual files is given, the -f/--format parameter cannot be used.")

	# default output files (overwritten below)
	trimmed_outfile = sys.stdout # reads with adapters go here
	too_short_outfile = None # too short reads go here
	#too_long_outfile = None # too long reads go here

	if options.output is not None:
		trimmed_outfile = xopen(options.output, 'w')
	untrimmed_outfile = trimmed_outfile # reads without adapters go here
	if options.untrimmed_output is not None:
		untrimmed_outfile = xopen(options.untrimmed_output, 'w')
	if options.too_short_output is not None:
		too_short_outfile = xopen(options.too_short_output, 'w')
	#if options.too_long_output is not None:
		#too_long_outfile = xopen(options.too_long_output, 'w')

	if options.maq:
		options.colorspace = True
		options.double_encode = True
		options.trim_primer = True
		options.strip_f3 = True
		options.suffix = "/1"
		options.zero_cap = True
	if options.trim_primer and not options.colorspace:
		parser.error("Trimming the primer makes only sense in color space.")
	if options.double_encode and not options.colorspace:
		parser.error("Double-encoding makes only sense in color space.")
	if (options.anywhere or options.front) and options.colorspace:
		parser.error("Using --anywhere or --front with color space reads is currently not supported  (if you think this may be useful, contact the author).")
	if not (0 <= options.error_rate <= 1.):
		parser.error("The maximum error rate must be between 0 and 1.")

	if options.rest_file is not None:
		options.rest_file = xopen(options.rest_file, 'w')
	if options.wildcard_file is not None:
		options.wildcard_file = xopen(options.wildcard_file, 'w')

	adapters = []
	if options.adapters:
		adapters = [ (BACK, adapter) for adapter in options.adapters ]
	if options.anywhere:
		adapters += [ (ANYWHERE, adapter) for adapter in options.anywhere ]
	if options.front:
		adapters += [ (FRONT, adapter) for adapter in options.front ]

	# make sure these aren't used by accident
	del options.adapters
	del options.anywhere
	del options.front

	if not adapters:
		print("You need to provide at least one adapter sequence.", file=sys.stderr)
		return 1

	#total_bases = 0
	#total_quality_trimmed = 0

	modifiers = []
	if options.length_tag:
		modifiers.append(LengthTagModifier(options.length_tag))
	if options.strip_f3:
		modifiers.append(SuffixRemover('_F3'))
	if options.prefix or options.suffix:
		modifiers.append(PrefixSuffixAdder(options.prefix, options.suffix))
	if options.double_encode:
		modifiers.append(DoubleEncoder())
	if options.zero_cap:
		modifiers.append(ZeroCapper())

	cutter = AdapterCutter(options.times, options.error_rate, options.overlap, options.rest_file,
				options.colorspace, adapters, options.match_read_wildcards, options.wildcard_file)
	readfilter = ReadFilter(options.minimum_length, options.maximum_length,
		too_short_outfile, options.discard_trimmed, cutter.stats) # TODO stats?
	try:
		twoheaders = None
		reader = read_sequences(input_filename, quality_filename, colorspace=options.colorspace, fileformat=options.format)
		for read in reader:
			# In colorspace, the first character is the last nucleotide of the primer base
			# and the second character encodes the transition from the primer base to the
			# first real base of the read.
			if options.trim_primer:
				read.sequence = read.sequence[2:]
				if read.qualities is not None: # TODO
					read.qualities = read.qualities[1:]
				initial = ''
			elif options.colorspace:
				initial = read.sequence[0]
				read.sequence = read.sequence[1:]
			else:
				initial = ''


			#total_bases += len(qualities)
			if options.quality_cutoff is not None:
				index = quality_trim_index(read.qualities, options.quality_cutoff, options.quality_base)
				read = read[:index]

			read, trimmed = cutter.cut(read)
			for modifier in modifiers:
				read = modifier.apply(read)
			if twoheaders is None:
				try:
					twoheaders = reader.twoheaders
				except AttributeError:
					twoheaders = False
			if readfilter.keep(read, trimmed):
				read.sequence = initial + read.sequence
				write_read(read, trimmed_outfile if trimmed else untrimmed_outfile, twoheaders)
	except seqio.FormatError as e:
		print("Error:", e, file=sys.stderr)
		return 1
	if options.output is None:
		sys.stdout = sys.stderr
	if options.rest_file is not None:
		options.rest_file.close()
	cutter.stats.print_statistics(options.error_rate)
	sys.stdout = sys.__stdout__

	return 0


if __name__ == '__main__':
	if len(sys.argv) > 1 and sys.argv[1] == '--profile':
		del sys.argv[1]
		import cProfile as profile
		profile.run('main()', 'cutadapt.prof')
	else:
		sys.exit(main())
